
local oisstart Date >= ym(2003,8)

local precrisis Date <= ym(2007,6)
* cutoff at the end
local postcrisis  Date >= ym(2010,7) & Date <= ym(2020,12)


tempfile temp temp2 temp3

* import and format HKM factor series

//import delimited "./Data/BasisFactor/He_Kelly_Manela_Factors_monthly.csv", varnames(1) clear
import delimited "./DataSkeleton/AssetClasses/He_Kelly_Manela_Factors_monthly.csv", varnames(1) clear

rename yyyymm Date
rename intermediary_value intermediary_value
rename intermediary_capital_risk_factor hkm_factor
sort Date

save `temp', replace

import delimited "./DataSkeleton/AssetClasses/FF6.csv", varnames(1) clear
rename yyyymm Date
rename f6* FF6*
sort Date

save `temp2', replace

import delimited "./DataSkeleton/AssetClasses/He_Kelly_Manela_Factors_And_Test_Assets_monthly.csv", varnames(1) clear

rename yyyymm Date

keep Date us_bonds*

rename us_bonds* OldBonds*

sort Date
save `temp3', replace


//use "./Data/BasisFactor/Factors_And_Test_Assets_Monthly.dta", clear
use "./OutputInterim/Factors_And_Test_Assets_Monthly.dta", clear

//names shortened in final version of code for R/stata compatibility
drop dd* f_ois* f_ibor* 
//drop dd* factor_ois* factor_ibor* 

rename port_ois_ret_classic*3M basis_return2
rename port_ibor_ret_classic*3M ibor_return2

rename port_ois_ret_single*3M basis_3v3
rename port_ibor_ret_single*3M ibor_3v3

rename port_ois_ret_top5*3M basis_top5
rename port_ibor_ret_top5*3M ibor_top5

rename port_ois_ret_equal*3M basis_top6
rename port_ibor_ret_equal*3M ibor_top6

rename *_USD_ois_ret_1M_fwd_3M ret_fwdarb_o3m*
rename *_USD_ois_ret_1M_fwd_1M ret_fwdarb_o1m*
rename *_USD_ibor_ret_1M_fwd_3M ret_fwdarb_i3m*

rename ret_fwdarb_o3mJPY basis_JPYUSD

** remove AUD OIS 3M 
drop ret_fwdarb_o3mAUD


mmerge Date using `temp', unmatched(master)
drop _merge

mmerge Date using `temp2', unmatched(master)
drop _merge

mmerge Date using `temp3', unmatched(master)
drop _merge

* put dates into Stata ym format

gen month = mod(Date, 100)
gen year = floor(Date/100)

gen qe = mod(month,3) == 1
gen ye = month == 12

drop Date

gen Date = ym(year,month)
format Date %tm

drop month year
order Date
sort Date


gen precrisis = `precrisis'
gen post_crisis = `postcrisis'

sort Date
mmerge Date using "./OutputInterim/BasisShock1M_Y.dta", unmatched(master)
drop _merge

rename basis_shock basis_resid

sort Date

mmerge Date using "./OutputInterim/PCportsMonthly.dta", unmatched(master) ukeep(pc1top6_ois_stand_1M_fwd_3M)

sort Date
rename pc1top6_ois_stand_1M_fwd_3M pc1top6_stand


save `temp', replace


local comm_cats energy grain ind_metal livstock prec_metal soft
import excel "./DataSkeleton/AssetClasses/bloomberg_commodity_subindex.xlsx", sheet("clean") firstrow allstring clear

gen Dated = date(date,"DMY")

format Dated %td
drop date

sort Dated

gen Date = mofd(Dated)
format Date %tm

foreach comm in `comm_cats' {
	destring `comm', replace force
}

collapse (lastnm) `comm_cats' , by(Date)

sort Date
tsset Date


foreach comm in `comm_cats' {
	local cshort = substr("`comm'",1,3)
	gen SmallCmNew_`cshort' = `comm' / L.`comm' - 1
	drop `comm'
}

mmerge Date using `temp', unmatched(using)
drop _merge

save `temp', replace

//clean_arbitrage_basis_pc1_monthly_v2.dta
//use "./Data/LiteratureData/WD_RawData/clean_arbitrage_basis_pc1_monthly_final.dta", clear
//use "./Data/LiteratureData/WD_RawData/clean_arbitrage_basis_pc1_monthly_v2.dta", clear

use "./OutputInterim/clean_arbitrage_basis_pc1_monthly_final.dta", clear

/*gen month = mofd(date)
sort date
	
collapse (lastnm) pc1, by(month)*/
	
rename month Date
format Date %tm
sort Date




	
save "./OutputInterim/PC1Monthly.dta", replace

use `temp', clear 

mmerge Date using "./OutputInterim/PC1Monthly.dta", unmatched(master) ukeep(pc1)
drop _merge

sort Date

tsset Date

corr spot_basis pc1 if post_crisis 
corr D.spot_basis D.pc1 if post_crisis 

reg pc1 spot_basis if post_crisis 
reg D.pc1 D.spot_basis if post_crisis 

reg pc1 L.pc1 if post_crisis
predict pc1shock, resid

gen lag_basis = L.spot_basis

reg spot_basis lag_basis if post_crisis // qe ye
predict basis_ar1, resid


drop spot_basis pc1 qe ye 

* Line up dates, switch sign, and rescale shock
gen basis_shock = -L.basis_return if post_crisis
replace basis_shock = . if ~post_crisis

gen basisresid_shock = -L.basis_resid if post_crisis
replace basisresid_shock = . if ~post_crisis

gen pc1ret = -L.pc1top6_stand
replace pc1ret = . if ~post_crisis


replace pc1shock = . if ~post_crisis

replace basis_ar1 = . if ~post_crisis

replace basis_JPYUSD = . if ~post_crisis
replace basis_3v3 = . if ~post_crisis
replace basis_top5 = . if ~post_crisis
replace basis_top6 = . if ~post_crisis

replace basis_JPYUSD = basis_JPYUSD * 100
replace basis_3v3 = -basis_3v3 * 100
replace basis_top5 = -basis_top5 * 100
replace basis_top6 = -basis_top6 * 100



su basis_shock if Date <= ym(2020,12)
local bs_sd = r(sd)
su pc1shock if Date <= ym(2020,12)
local pc_sd = r(sd)

replace pc1shock = pc1shock * `bs_sd' / `pc_sd'


* give everything a common prefix
rename FF25* ret_FF25*
rename FF6* ret_FF6*
rename US_gov* ret_USgov*
rename Sovereign* ret_Sovereign*
rename FX_all* ret_FXall*
rename FX_dev* ret_FXdev*
rename CDS* ret_CDS*
rename Corp_bond* ret_USCorp*
rename Comm* ret_Comm*
rename SmallCmNew* ret_SmallCmNew*
rename OldBonds* ret_OldBonds*

rename option_18_full_new* ret_Onew100*

rename Option_18_90adj_new* ret_Onew90*


gen rf_rate = L.Tbill_1M / 12
replace rf_rate = L.USD_ois_1M / 12 if `oisstart' //post_crisis

reshape long ret_, i(Date) j(asset) string


save `temp', replace



//generate small commodity portfolios from the 23 HKM used
//not used in final version of the paper
//these are from https://data.bloomberglp.com/professional/sites/10/BCOM-Methodology.pdf
/*import excel "CommodityCategories.xlsx", sheet("Sheet1") firstrow allstring clear
gen assetn = _n
egen gnum = group(Category)
drop CommodityName 
sort assetn
save `temp2', replace

use `temp', clear

gen assetn = substr(asset,-2,.) if regexm(asset,"Commod")
replace assetn = substr(asset,-1,.) if regexm(assetn,"_")
destring assetn, replace force

mmerge assetn using `temp2', unmatched(master)
drop _merge

expand 2 if regexm(asset,"Commod"), gen(dup)

//tostring gnum, gen(gstr)

gen assetnew = "SmallComm_" + substr(Category,1,3) if regexm(asset,"Commod")

replace asset = assetnew  if dup == 1*/

sort Date asset

// get rid of extra variables
collapse (mean) ret_ (firstnm) Mkt rf_rate intermediary_value lag_basis basis_shock basisresid_shock pc1ret pc1shock basis_ar1 basis_JPYUSD basis_3v3 basis_top5 basis_top6 hkm_factor post_crisis, by(Date asset)
 

gen eret_ = ret_ - rf_rate
replace eret = ret_ / 100 - rf_rate if regexm(asset,"FF6") 
replace eret_ = ret_ if regexm(asset,"FX") | regexm(asset,"CDS") | regexm(asset,"Commod") | regexm(asset,"SmallComm")
replace eret_ = ret_ if regexm(asset,"fwdarb")
replace eret_ = . if regexm(asset,"fwdarb") & ~post_crisis
replace Mkt = Mkt - rf_rate
replace intermediary_value = intermediary_value - rf_rate


replace intermediary_value = intermediary_value * 100
replace Mkt = Mkt * 100
replace eret_ = eret_ * 100

replace hkm_factor = hkm_factor * 100


rename intermediary_value int_value

label var Mkt Market
label var hkm_factor "HKM Factor"
label var basis_shock "Neg. Fwd CIP Ret."
label var basisresid_shock "Basis Residual"
label var pc1shock "PC1 Residual"
label var int_value "Int. Equity"
label var basis_ar1 "Basis AR1 Residual"
label var pc1ret "Fwd. CIP Ret. PC1"

drop ret_*

** match up dates within asset classes
//drop if Date < ym(1991,5) & regexm(asset,"US") 
replace eret_ = . if Date < ym(1988,9) & regexm(asset,"US")
replace eret_ = . if Date < ym(1988,9) & regexm(asset,"OldBonds") //1974,m8 is the original HKM cutoff
drop if regexm(asset,"USgov_01")

replace eret_ = . if Date < ym(1991,2) & (regexm(asset,"Commod") | regexm(asset,"SmallCommod") | regexm(asset,"SmallCmNew"))
replace eret_ = . if Date < ym(1996,1) & regexm(asset,"Onew")

** uniform end dates
drop if Date > ym(2020,12)


drop if regexm(asset,"fwdarb_o") & regexm(asset,"CHF") 
drop if regexm(asset,"fwdarb_i") & regexm(asset,"CAD")

drop if asset=="CDS_6" | asset == "CDS_7"


reshape wide eret_, i(Date) j(asset) string


** Code to choose small portfolios

*** Options
**lc: low strike call, mp: medium strike put, etc...
** 30/60/90: days to expiry
gen eret_SmallOpt_lc30=eret_Onew100_1
gen eret_SmallOpt_mc30=eret_Onew100_2
gen eret_SmallOpt_hc30=eret_Onew100_3


gen eret_SmallOpt_lc90=eret_Onew100_7
gen eret_SmallOpt_mc90=eret_Onew100_8
gen eret_SmallOpt_hc90=eret_Onew100_9

gen eret_SmallOpt_lp30=eret_Onew100_10
gen eret_SmallOpt_mp30=eret_Onew100_11
gen eret_SmallOpt_hp30=eret_Onew100_12


gen eret_SmallOpt_lp90=eret_Onew100_16
gen eret_SmallOpt_mp90=eret_Onew100_17
gen eret_SmallOpt_hp90=eret_Onew100_18


save "./OutputInterim/CrossSectionData.dta", replace



matrix means = J(2,1,0)
su int_value
matrix means[1,1] = r(mean)
su pc1ret
matrix means[2,1] = r(mean)
matrix means[2,1] = means[2,1] //+ 0.0045

corr int_value pc1ret if `postcrisis'

corr int_value pc1ret if `postcrisis', covariance

matrix covmat = r(C)

matrix list covmat

matrix temp = inv(covmat) * means * 100

matrix list temp


//some stats for carry
su Mkt

local mkt_sharpe = `r(mean)' / `r(sd)' * sqrt(12)

su Mkt if post_crisis

local mkt_sharpe_post = `r(mean)' / `r(sd)' * sqrt(12)

gen carry_ret = eret_FXdeveloped_01 - eret_FXdeveloped_05

su carry_ret

local carry_sharpe = `r(mean)' / `r(sd)' * sqrt(12)

su carry_ret if post_crisis

local carry_sharpe_post = `r(mean)' / `r(sd)' * sqrt(12)

disp "full sample: mkt `mkt_sharpe', carry `carry_sharpe'"
disp "post sample: mkt `mkt_sharpe_post', carry `carry_sharpe_post'"

corr Mkt carry_ret pc1ret if post_crisis


//make quarterly panel
reshape long eret_, i(Date) j(asset) string

gen quarter = qofd(dofm(Date))
sort quarter Date
format quarter %tq




replace eret_ = log(1 + (eret_ /100+ rf_rate)) * 100 if ~regexm(asset,"fwdarb")
replace Mkt= log(1 + (Mkt/100+ rf_rate)) * 100
replace int_value= log(1 + (int_value/100+ rf_rate)) * 100
replace rf_rate = log(1 + rf_rate) * 100

//basis_ar1 and basisresid_shock are included only because they are assumed
//to exist in the GMM code, not because they are used
collapse (count) cnt_eret=eret_ cnt_basis=basis_shock cnt_int=int_value cnt_Mkt=Mkt cnt_pc1ret=pc1ret (sum) eret_ rf_rate Mkt int_value basis_shock basisresid_shock basis_ar1 pc1ret, by(quarter asset)


sort quarter asset

mmerge quarter using "./OutputInterim/aem_update.dta", unmatched(master) ukeep(quarter dlev)
drop _merge
rename dlev aem

mmerge quarter using "./OutputInterim/He_Kelly_Manela_Factors_quarterly.dta", unmatched(master)
drop _merge

replace Mkt = . if cnt_Mkt < 3
drop cnt_Mkt

replace int_value = . if cnt_int < 3
drop cnt_int

replace basis_shock = . if cnt_basis < 3
drop cnt_basis

replace pc1ret = . if cnt_pc1ret < 3
drop cnt_pc1ret

drop if cnt_eret < 3
drop cnt_eret

rename quarter Date

//rounding avoids reshape errors
replace rf_rate = round(100*(exp(rf_rate/100) - 1),0.00000001)
replace Mkt = round(100*(exp(Mkt/100) - 1) - rf_rate,0.00000001)
replace int_value = round(100*(exp(int_value/100) - 1) - rf_rate,0.00000001)
replace eret_ = 100*(exp(eret_/100) - 1) - rf_rate if ~regexm(asset,"fwdarb")

replace int_equity = 100*int_equity - rf_rate
replace hkm_factor = hkm_factor * 100

//gmm code expects rf_rate in this scale
replace rf_rate = rf_rate / 100

reshape wide eret_, i(Date) j(asset) string

save "./OutputInterim/CrossSectionDataQtr.dta", replace

